{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SVD():\n",
    "    def __init__(self, rating_data, F=5, alpha=0.1, lmbda=0.1, max_iter=100):\n",
    "        self.F = F           # 这个表示隐向量的维度\n",
    "        self.P = dict()          #  用户矩阵P  大小是[users_num, F]\n",
    "        self.Q = dict()     # 物品矩阵Q  大小是[item_nums, F]\n",
    "        self.bu = dict()   # 用户偏差系数\n",
    "        self.bi = dict()    # 物品偏差系数\n",
    "        self.mu = 0.0        # 全局偏差系数\n",
    "        self.alpha = alpha   # 学习率\n",
    "        self.lmbda = lmbda    # 正则项系数\n",
    "        self.max_iter = max_iter    # 最大迭代次数\n",
    "        self.rating_data = rating_data # 评分矩阵\n",
    "        \n",
    "        # 初始化矩阵P和Q, 方法很多， 一般用随机数填充， 但随机数大小有讲究， 根据经验， 随机数需要和1/sqrt(F)成正比\n",
    "        cnt = 0    # 统计总的打分数， 初始化mu用\n",
    "        for user, items in self.rating_data.items():\n",
    "            self.P[user] = [random.random() / math.sqrt(self.F)  for x in range(0, F)]\n",
    "            self.bu[user] = 0\n",
    "            cnt += len(items) \n",
    "            for item, rating in items.items():\n",
    "                if item not in self.Q:\n",
    "                    self.Q[item] = [random.random() / math.sqrt(self.F) for x in range(0, F)]\n",
    "                    self.bi[item] = 0\n",
    "        self.mu /= cnt\n",
    "        \n",
    "    # 有了矩阵之后， 就可以进行训练, 这里使用随机梯度下降的方式训练参数P和Q\n",
    "    def train(self):\n",
    "        for step in range(self.max_iter):\n",
    "            for user, items in self.rating_data.items():\n",
    "                for item, rui in items.items():\n",
    "                    rhat_ui = self.predict(user, item)   # 得到预测评分\n",
    "                    # 计算误差\n",
    "                    e_ui = rui - rhat_ui\n",
    "                    \n",
    "                    self.bu[user] += self.alpha * (e_ui - self.lmbda * self.bu[user])\n",
    "                    self.bi[item] += self.alpha * (e_ui - self.lmbda * self.bi[item])\n",
    "                    # 随机梯度下降更新梯度\n",
    "                    for k in range(0, self.F):\n",
    "                        self.P[user][k] += self.alpha * (e_ui*self.Q[item][k] - self.lmbda * self.P[user][k])\n",
    "                        self.Q[item][k] += self.alpha * (e_ui*self.P[user][k] - self.lmbda * self.Q[item][k])\n",
    "                    \n",
    "            self.alpha *= 0.1    # 每次迭代步长要逐步缩小\n",
    "    \n",
    "    # 预测user对item的评分， 这里没有使用向量的形式\n",
    "    def predict(self, user, item):\n",
    "        return sum(self.P[user][f] * self.Q[item][f] for f in range(0, self.F)) + self.bu[user] + self.bi[item] + self.mu   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadData():\n",
    "    rating_data={1: {'A': 5, 'B': 3, 'C': 4, 'D': 4},\n",
    "           2: {'A': 3, 'B': 1, 'C': 2, 'D': 3, 'E': 3},\n",
    "           3: {'A': 4, 'B': 3, 'C': 4, 'D': 3, 'E': 5},\n",
    "           4: {'A': 3, 'B': 3, 'C': 1, 'D': 5, 'E': 4},\n",
    "           5: {'A': 1, 'B': 5, 'C': 5, 'D': 2, 'E': 1}\n",
    "          }\n",
    "    return rating_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'math' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-5-029bec8e4f0a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrandom\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mrating_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloadData\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mbasicsvd\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSVD\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrating_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mbasicsvd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'E'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-1-bfd554e2109b>\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, rating_data, F, alpha, lmbda, max_iter)\u001b[0m\n\u001b[0;32m     15\u001b[0m         \u001b[0mcnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m    \u001b[1;31m# 统计总的打分数， 初始化mu用\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     16\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0muser\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitems\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrating_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mP\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0muser\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mF\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     18\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbu\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0muser\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m             \u001b[0mcnt\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-1-bfd554e2109b>\u001b[0m in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m     15\u001b[0m         \u001b[0mcnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m    \u001b[1;31m# 统计总的打分数， 初始化mu用\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     16\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0muser\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitems\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrating_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mP\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0muser\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mF\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     18\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbu\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0muser\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m             \u001b[0mcnt\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'math' is not defined"
     ]
    }
   ],
   "source": [
    "import random\n",
    "import math\n",
    "rating_data = loadData()\n",
    "basicsvd = SVD(rating_data, F=10)\n",
    "basicsvd.train()\n",
    "for item in ['E']:\n",
    "    print(item, basicsvd.predict(1, item))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
